/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.phoenix.hbase.index.covered.filter;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValue.Type;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.filter.FilterBase;
import org.apache.phoenix.hbase.index.util.ImmutableBytesPtr;
import org.apache.phoenix.util.PhoenixKeyValueUtil;

/**
 * Only allow the 'latest' timestamp of each family:qualifier pair, ensuring that they aren't
 * covered by a previous delete. This is similar to some of the work the ScanQueryMatcher does to
 * ensure correct visibility of keys based on deletes.
 * <p>
 * No actual delete {@link KeyValue}s are allowed to pass through this filter - they are always
 * skipped.
 * <p>
 * Note there is a little bit of conceptually odd behavior (though it matches the HBase
 * specifications) around point deletes ({@link KeyValue} of type {@link Type#Delete}. These deletes
 * only apply to a single {@link KeyValue} at a single point in time - they essentially completely
 * 'cover' the existing {@link Put} at that timestamp. However, they don't 'cover' any other
 * keyvalues at older timestamps. Therefore, if there is a point-delete at ts = 5, and puts at ts =
 * 4, and ts = 5, we will only allow the put at ts = 4.
 * <p>
 * Expects {@link KeyValue}s to arrive in sorted order, with 'Delete' {@link Type} {@link KeyValue}s
 * ({@link Type#DeleteColumn}, {@link Type#DeleteFamily}, {@link Type#Delete})) before their regular
 * {@link Type#Put} counterparts.
 */
public class ApplyAndFilterDeletesFilter extends FilterBase {

  List<ImmutableBytesPtr> families;
  private final DeleteTracker coveringDelete = new DeleteTracker();
  private Hinter currentHint;
  private DeleteColumnHinter columnHint = new DeleteColumnHinter();
  private DeleteFamilyHinter familyHint = new DeleteFamilyHinter();

  /**
   * Setup the filter to only include the given families. This allows us to seek intelligently pass
   * families we don't care about.
   */
  public ApplyAndFilterDeletesFilter(Set<ImmutableBytesPtr> families) {
    this.families = new ArrayList<ImmutableBytesPtr>(families);
    Collections.sort(this.families);
  }

  public DeleteTracker getDeleteTracker() {
    return coveringDelete;
  }

  private ImmutableBytesPtr getNextFamily(ImmutableBytesPtr family) {
    int index = Collections.binarySearch(families, family);
    // doesn't match exactly, be we can find the right next match
    // this is pretty unlikely, but just incase
    if (index < 0) {
      // the actual location of the next match
      index = -index - 1;
    } else {
      // its an exact match for a family, so we get the next entry
      index = index + 1;
    }
    // now we have the location of the next entry
    if (index >= families.size()) {
      return null;
    }
    return families.get(index);
  }

  @Override
  public void reset() {
    this.coveringDelete.reset();
  }

  @Override
  public Cell getNextCellHint(Cell peeked) {
    return currentHint.getHint(PhoenixKeyValueUtil.maybeCopyCell(peeked));
  }

  // No @Override for HBase 3 compatibility
  public ReturnCode filterKeyValue(Cell next) {
    return this.filterCell(next);
  }

  @Override
  public ReturnCode filterCell(Cell next) {
    KeyValue nextKV = PhoenixKeyValueUtil.maybeCopyCell(next);
    switch (next.getType()) {
      /*
       * DeleteFamily will always sort first because those KVs (we assume) don't have qualifiers (or
       * rather are null). Therefore, we have to keep a hold of all the delete families until we get
       * to a Put entry that is covered by that delete (in which case, we are done with the family).
       */
      case DeleteFamily:
        // track the family to delete. If we are updating the delete, that means we have passed all
        // kvs in the last column, so we can safely ignore the last deleteFamily, and just use this
        // one. In fact, it means that all the previous deletes can be ignored because the family
        // must
        // not match anymore.
        // We could potentially have multiple deleteFamily for the same row and family
        // (e.g. upsert row+family, delete it, upsert again, delete again),
        // in which case we keep the first one since its timestamp dominates
        if (
          coveringDelete.deleteFamily == null
            || !CellUtil.matchingFamily(coveringDelete.deleteFamily, nextKV)
        ) {
          this.coveringDelete.reset();
          this.coveringDelete.deleteFamily = nextKV;
        }
        return ReturnCode.SKIP;
      case DeleteColumn:
        // similar to deleteFamily, all the newer deletes/puts would have been seen at this point,
        // so
        // we can safely replace the more recent delete column with the more recent one
        this.coveringDelete.pointDelete = null;
        this.coveringDelete.deleteColumn = nextKV;
        return ReturnCode.SKIP;
      case Delete:
        // we are just deleting the single column value at this point.
        // therefore we just skip this entry and go onto the next one. The only caveat is that
        // we should still cover the next entry if this delete applies to the next entry, so we
        // have to keep around a reference to the KV to compare against the next valid entry
        this.coveringDelete.pointDelete = nextKV;
        return ReturnCode.SKIP;
      default:
        // no covering deletes
        if (coveringDelete.empty()) {
          return ReturnCode.INCLUDE;
        }

        if (coveringDelete.matchesFamily(nextKV)) {
          this.currentHint = familyHint;
          return ReturnCode.SEEK_NEXT_USING_HINT;
        }

        if (coveringDelete.matchesColumn(nextKV)) {
          // hint to the next column
          this.currentHint = columnHint;
          return ReturnCode.SEEK_NEXT_USING_HINT;
        }

        if (coveringDelete.matchesPoint(nextKV)) {
          return ReturnCode.SKIP;
        }

    }

    // none of the deletes matches, we are done
    return ReturnCode.INCLUDE;
  }

  /**
   * Get the next hint for a given peeked keyvalue
   */
  interface Hinter {
    public abstract Cell getHint(Cell peek);
  }

  /**
   * Entire family has been deleted, so either seek to the next family, or if none are present in
   * the original set of families to include, seek to the "last possible key"(or rather our best
   * guess) and be done.
   */
  class DeleteFamilyHinter implements Hinter {

    @Override
    public Cell getHint(Cell peeked) {
      // check to see if we have another column to seek
      ImmutableBytesPtr nextFamily = getNextFamily(new ImmutableBytesPtr(peeked.getFamilyArray(),
        peeked.getFamilyOffset(), peeked.getFamilyLength()));
      if (nextFamily == null) {
        return KeyValue.LOWESTKEY;
      }
      // there is a valid family, so we should seek to that
      return org.apache.hadoop.hbase.KeyValueUtil.createFirstOnRow(peeked.getRowArray(),
        peeked.getRowOffset(), peeked.getRowLength(), nextFamily.get(), nextFamily.getOffset(),
        nextFamily.getLength(), HConstants.EMPTY_BYTE_ARRAY, 0, 0);
    }

  }

  /**
   * Hint the next column-qualifier after the given keyvalue. We can't be smart like in the
   * ScanQueryMatcher since we don't know the columns ahead of time.
   */
  private static class DeleteColumnHinter implements Hinter {

    @Override
    public Cell getHint(Cell kv) {
      return org.apache.hadoop.hbase.KeyValueUtil.createLastOnRow(kv.getRowArray(),
        kv.getRowOffset(), kv.getRowLength(), kv.getFamilyArray(), kv.getFamilyOffset(),
        kv.getFamilyLength(), kv.getQualifierArray(), kv.getQualifierOffset(),
        kv.getQualifierLength());
    }
  }

  public static class DeleteTracker {

    public KeyValue deleteFamily;
    public KeyValue deleteColumn;
    public KeyValue pointDelete;

    public void reset() {
      this.deleteFamily = null;
      this.deleteColumn = null;
      this.pointDelete = null;

    }

    /**
     * Check to see if we should skip this {@link KeyValue} based on the family.
     * <p>
     * Internally, also resets the currently tracked "Delete Family" marker we are tracking if the
     * keyvalue is into another family (since CFs sort lexicographically, we can discard the current
     * marker since it must not be applicable to any more kvs in a linear scan).
     * @return <tt>true</tt> if this {@link KeyValue} matches a delete.
     */
    public boolean matchesFamily(KeyValue next) {
      if (deleteFamily == null) {
        return false;
      }
      if (CellUtil.matchingFamily(deleteFamily, next)) {
        // falls within the timestamp range
        if (deleteFamily.getTimestamp() >= next.getTimestamp()) {
          return true;
        }
      } else {
        // only can reset the delete family because we are on to another family
        deleteFamily = null;
      }

      return false;
    }

    /**
     */
    public boolean matchesColumn(KeyValue next) {
      if (deleteColumn == null) {
        return false;
      }
      if (
        CellUtil.matchingFamily(deleteColumn, next)
          && CellUtil.matchingQualifier(deleteColumn, next)
      ) {
        // falls within the timestamp range
        if (deleteColumn.getTimestamp() >= next.getTimestamp()) {
          return true;
        }
      } else {
        deleteColumn = null;
      }
      return false;
    }

    /**
     */
    public boolean matchesPoint(KeyValue next) {
      // point deletes only apply to the exact KV that they reference, so we only need to ensure
      // that the timestamp matches exactly. Because we sort by timestamp first, either the next
      // keyvalue has the exact timestamp or is an older (smaller) timestamp, and we can allow that
      // one.
      if (
        pointDelete != null && CellUtil.matchingFamily(pointDelete, next)
          && CellUtil.matchingQualifier(pointDelete, next)
      ) {
        if (pointDelete.getTimestamp() == next.getTimestamp()) {
          return true;
        }
        // clear the point delete since the TS must not be matching
        pointDelete = null;
      }
      return false;
    }

    /** Returns <tt>true</tt> if no delete has been set */
    public boolean empty() {
      return deleteFamily == null && deleteColumn == null && pointDelete == null;
    }
  }
}
